#Research Question: Do any supplements cause a significant mean decrease in human gut pH from week 1 (pre-supplement) to week 3 (post-supplement)?

Load packages

library(tidyverse)
library(readxl)
library(broom)
library(cowplot)
library(phyloseq); packageVersion("phyloseq")
set.seed(7)

#Import Data

#Import Data
shared_wkly <- read_delim(file = "raw_data/shared_wkly.txt",
                        delim = "\t", col_names = TRUE, trim_ws = TRUE, 
                        na = c("", "NA")) 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Participant_ID = col_character(),
##   Semester = col_character(),
##   Study_week = col_character(),
##   Quantity_compliant = col_character(),
##   Frequency = col_character(),
##   Supplement_consumed = col_character()
## )
## See spec(...) for full column specifications.
biographical <- read_delim(file = "raw_data/biographical.txt",
                        delim = "\t", col_names = TRUE, trim_ws = TRUE, 
                        na = c("", "NA")) 
## Parsed with column specification:
## cols(
##   Participant_ID = col_character(),
##   Use_Data = col_character(),
##   Sex = col_character(),
##   Age = col_double(),
##   Race_ethnicity = col_character(),
##   Weight_kg = col_double(),
##   Height_meters = col_double(),
##   BMI = col_double()
## )
ph_wkly <- read_delim(file = "raw_data/DB_v_0.08/pH_wkly.txt",
                        delim = "\t", col_names = TRUE, trim_ws = TRUE, 
                        na = c("", "NA")) 
## Parsed with column specification:
## cols(
##   Participant_ID = col_character(),
##   Study_week = col_character(),
##   Status = col_character(),
##   Semester = col_character(),
##   Supplement_consumed = col_character(),
##   Use_Data = col_character(),
##   Quantity_compliant = col_character(),
##   Frequency = col_character(),
##   pH_median = col_double(),
##   pH_mean = col_double()
## )
#Look at pH Data
ph_wkly
## # A tibble: 687 x 10
##    Participant_ID Study_week Status Semester Supplement_cons… Use_Data
##    <chr>          <chr>      <chr>  <chr>    <chr>            <chr>   
##  1 U400           week3      during Fall2017 Banana           yes     
##  2 U401           week3      during Fall2017 Banana           yes     
##  3 U402           week3      during Fall2017 Banana           yes     
##  4 U404           week3      during Fall2017 Banana           yes     
##  5 U406           week3      during Fall2017 none             yes     
##  6 U407           week3      during Fall2017 Banana           yes     
##  7 U408           week3      during Fall2017 Banana           yes     
##  8 U411           week3      during Fall2017 Banana           yes     
##  9 U413           week3      during Fall2017 Banana           yes     
## 10 U414           week3      during Fall2017 Banana           yes     
## # … with 677 more rows, and 4 more variables: Quantity_compliant <chr>,
## #   Frequency <chr>, pH_median <dbl>, pH_mean <dbl>
count(ph_wkly, Supplement_consumed)
## # A tibble: 7 x 2
##   Supplement_consumed     n
##   <chr>               <int>
## 1 Banana                 32
## 2 BRMPS                 365
## 3 HiMaize+BRMPS          93
## 4 LOODAT                 35
## 5 none                   73
## 6 transition_HiMaize     87
## 7 <NA>                    2
count(ph_wkly, Frequency)
## # A tibble: 4 x 2
##   Frequency     n
##   <chr>     <int>
## 1 -            73
## 2 1xdaily     335
## 3 2xdaily     277
## 4 <NA>          2

There are 6 supplements: 1. Banana (n=32) 2. BRMPS (n=365) 3. HiMaize+BRMPS (n=93) 4. LOODAT (n=35) 5. none (n=73) 6. transition_HiMaize (n=87) and 2 with NA (remove)

#Curate pH Weekly Data

#Rename to our standards and filter data
curated_ph_wkly <- ph_wkly %>%
  rename_all(tolower) %>%
  filter(quantity_compliant == "yes" | quantity_compliant == "none",
         use_data == "yes",
         study_week == "week1" | study_week == "week3",
         frequency != "NA") 
View(curated_ph_wkly)
write_delim(curated_ph_wkly, path = "curated_data/curated_ph_wkly.txt", delim = "\t")

Now we have a common standard for all of our data. From here, we can further subset for specific conditions, but this data set contains the general conditions for all further calculations.

#Supplement Specific Assumption & Statistical Testing with Plots

It is important to note that there was no pH data for participants who consumed Bananas for week 1, and thus we could not compute a statistical analysis on it.

#2x Daily BRMPS


#Further Curation
brmps_wk1_2x <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "BRMPS", 
         frequency == "2xdaily") 


brmps_wk3_2x <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "BRMPS", 
         frequency == "2xdaily") 


#Normality
shapiro.test(brmps_wk1_2x$ph_mean) #p-value = .3826
## 
##  Shapiro-Wilk normality test
## 
## data:  brmps_wk1_2x$ph_mean
## W = 0.96172, p-value = 0.3826
shapiro.test(brmps_wk3_2x$ph_mean) #p-value = .006713
## 
##  Shapiro-Wilk normality test
## 
## data:  brmps_wk3_2x$ph_mean
## W = 0.93851, p-value = 0.006713
#Normality Plots
ggplot(brmps_wk1_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(brmps_wk3_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(brmps_wk1_2x$ph_mean); qqline(brmps_wk1_2x$ph_mean)

qqnorm(brmps_wk3_2x$ph_mean); qqline(brmps_wk3_2x$ph_mean)

#Final Joined Data
brmps_2x <- inner_join(x = brmps_wk1_2x, y = brmps_wk3_2x,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(brmps_ph_mean_2x_wk1 = ph_mean.x,
         brmps_ph_mean_2x_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(brmps_2x) # n=27
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 27 obs. of  13 variables:
##  $ participant_id      : chr  "U500" "U501" "U502" "U507" ...
##  $ status.x            : chr  "before" "before" "before" "before" ...
##  $ semester            : chr  "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
##  $ supplement_consumed : chr  "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
##  $ use_data.x          : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant  : chr  "yes" "yes" "yes" "yes" ...
##  $ frequency           : chr  "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
##  $ ph_median.x         : num  6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
##  $ brmps_ph_mean_2x_wk1: num  6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
##  $ status.y            : chr  "during" "during" "during" "during" ...
##  $ use_data.y          : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y         : num  6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
##  $ brmps_ph_mean_2x_wk3: num  6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
var.test(x = brmps_2x$brmps_ph_mean_2x_wk1, 
         y = brmps_2x$brmps_ph_mean_2x_wk3, 
         alternative = "greater") #p-value = .9512
## 
##  F test to compare two variances
## 
## data:  brmps_2x$brmps_ph_mean_2x_wk1 and brmps_2x$brmps_ph_mean_2x_wk3
## F = 0.51589, num df = 26, denom df = 26, p-value = 0.9512
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.2674087       Inf
## sample estimates:
## ratio of variances 
##          0.5158883
#Statistical Tests 
wilcox.test(x = brmps_2x$brmps_ph_mean_2x_wk1, 
            y = brmps_2x$brmps_ph_mean_2x_wk3, 
            alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x = brmps_2x$brmps_ph_mean_2x_wk1, y =
## brmps_2x$brmps_ph_mean_2x_wk3, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = brmps_2x$brmps_ph_mean_2x_wk1, y =
## brmps_2x$brmps_ph_mean_2x_wk3, : cannot compute exact p-value with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  brmps_2x$brmps_ph_mean_2x_wk1 and brmps_2x$brmps_ph_mean_2x_wk3
## V = 252, p-value = 0.0002761
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.0002761


#BRMPS 2x Daily Plot
brmps2_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         study_week == "week1" | study_week == "week3",
         frequency == "2xdaily") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH after BRMPS Supplement (2x daily)") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
          plot = brmps2_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


brmps2_plot

We have an extremely low p-value (less than alpha = .001) indicating that BRMPS taken 2x daily did lower pH in participants from week 1 to week 3 on average. In calculating the conclusion, we decided to use a non-parametric t-test (wilcox) because week 3 had non-normal data and week 1 did not follow the Normal Q-Q plot very accurately and the histogram had a slight bimodal look. Additionally because the sample size was less than 30 we decided to be safe and use the non-parametric test; variences were equal though for this test.

#2x Daily Transition Maize


#Further Curation
transition_wk1_2x <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "transition_HiMaize", 
         frequency == "2xdaily") 


transition_wk3_2x <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "transition_HiMaize", 
         frequency == "2xdaily") 


#Normaily + Varience Assumptions
shapiro.test(transition_wk1_2x$ph_mean) #p-value = .8910
## 
##  Shapiro-Wilk normality test
## 
## data:  transition_wk1_2x$ph_mean
## W = 0.98083, p-value = 0.891
shapiro.test(transition_wk3_2x$ph_mean) #p-value = .5083
## 
##  Shapiro-Wilk normality test
## 
## data:  transition_wk3_2x$ph_mean
## W = 0.96074, p-value = 0.5043
#Normality Plots
ggplot(transition_wk1_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(transition_wk3_2x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(transition_wk1_2x$ph_mean); qqline(transition_wk1_2x$ph_mean)

qqnorm(transition_wk3_2x$ph_mean); qqline(transition_wk3_2x$ph_mean)

#Final Joined Data
transition_2x <- inner_join(x = transition_wk1_2x, y = transition_wk3_2x,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(transition_ph_mean_2x_wk1 = ph_mean.x,
         transition_ph_mean_2x_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(transition_2x) # n=21
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 21 obs. of  13 variables:
##  $ participant_id           : chr  "U547" "U548" "U550" "U551" ...
##  $ status.x                 : chr  "before" "before" "before" "before" ...
##  $ semester                 : chr  "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
##  $ supplement_consumed      : chr  "transition_HiMaize" "transition_HiMaize" "transition_HiMaize" "transition_HiMaize" ...
##  $ use_data.x               : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant       : chr  "yes" "yes" "yes" "yes" ...
##  $ frequency                : chr  "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
##  $ ph_median.x              : num  7.05 6.4 5.8 6.6 6 6.6 6.4 6.4 6.6 6.8 ...
##  $ transition_ph_mean_2x_wk1: num  7.05 6.4 5.8 6.6 6 6.6 6.4 6.4 6.6 6.8 ...
##  $ status.y                 : chr  "during" "during" "during" "during" ...
##  $ use_data.y               : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y              : num  7.2 6.2 6.6 6.2 6.2 6.2 5.4 6.3 6 6.2 ...
##  $ transition_ph_mean_2x_wk3: num  7.2 6.2 6.6 6.2 6.2 6.2 5.4 6.3 5.87 6.2 ...
var.test(x = transition_2x$transition_ph_mean_2x_wk1, 
         y = transition_2x$transition_ph_mean_2x_wk3, 
         alternative = "greater") #p-value = .9395
## 
##  F test to compare two variances
## 
## data:  transition_2x$transition_ph_mean_2x_wk1 and transition_2x$transition_ph_mean_2x_wk3
## F = 0.49206, num df = 20, denom df = 20, p-value = 0.9395
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.2316492       Inf
## sample estimates:
## ratio of variances 
##          0.4920589
#Statistical Tests
t.test(x = transition_2x$transition_ph_mean_2x_wk1, y = transition_2x$transition_ph_mean_2x_wk3, 
       alternative = "greater", paired = TRUE, var.equal = TRUE) 
## 
##  Paired t-test
## 
## data:  transition_2x$transition_ph_mean_2x_wk1 and transition_2x$transition_ph_mean_2x_wk3
## t = 2.6789, df = 20, p-value = 0.007215
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.1031209       Inf
## sample estimates:
## mean of the differences 
##               0.2895238
#p-value = 0.007215


#Transition HiMaize 2x Daily Plot
transition_himaize_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "transition_HiMaize",
         study_week == "week1" | study_week == "week3") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week)) +
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH after 2x Transition HiMaize") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "transition_himaize_plot.pdf",
          plot = transition_himaize_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


transition_himaize_plot

After running significance testing for Transition Maize taken 2x daily we found that there was a signifiant p-value (less than alpha = .01) providing evidence to reject the null hypothesis and conclude that this supplement did decrease the pH in participants from week 1 to week 3 on average. The sample size was not ideal, but both shapiro tests indicated that each subsetted data set was approximately normal which we believed was sufficient evidence to carry out a parametric test (t-test); the variences were equal for the data as well.

#1x Daily BRMPS 


#Further Curation
brmps_wk1_1x <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "BRMPS", 
         frequency == "1xdaily") 


brmps_wk3_1x <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "BRMPS", 
         frequency == "1xdaily") 


#Normaily + Varience Assumptions
shapiro.test(brmps_wk1_1x$ph_mean) #p-value = 0.01669
## 
##  Shapiro-Wilk normality test
## 
## data:  brmps_wk1_1x$ph_mean
## W = 0.9533, p-value = 0.01669
shapiro.test(brmps_wk3_1x$ph_mean) #p-value = .08154
## 
##  Shapiro-Wilk normality test
## 
## data:  brmps_wk3_1x$ph_mean
## W = 0.96627, p-value = 0.08154
#Normality Plots
ggplot(brmps_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(brmps_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(brmps_wk1_1x$ph_mean); qqline(brmps_wk1_1x$ph_mean)

qqnorm(brmps_wk3_1x$ph_mean); qqline(brmps_wk3_1x$ph_mean)

#Final Joined Data
brmps_1x <- inner_join(x = brmps_wk1_1x, y = brmps_wk3_1x,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(brmps_ph_mean_1x_wk1 = ph_mean.x,
         brmps_ph_mean_1x_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(brmps_1x) # n=62
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 62 obs. of  13 variables:
##  $ participant_id      : chr  "U602" "U603" "U605" "U606" ...
##  $ status.x            : chr  "before" "before" "before" "before" ...
##  $ semester            : chr  "Fall2018" "Fall2018" "Fall2018" "Fall2018" ...
##  $ supplement_consumed : chr  "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
##  $ use_data.x          : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant  : chr  "yes" "yes" "yes" "yes" ...
##  $ frequency           : chr  "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
##  $ ph_median.x         : num  6.38 7.25 7.5 7.5 6.88 7.25 7.5 6.25 6.75 5.88 ...
##  $ brmps_ph_mean_1x_wk1: num  6.38 7.25 7.5 7.5 6.88 7.25 7.5 6.25 6.75 5.88 ...
##  $ status.y            : chr  "during" "during" "during" "during" ...
##  $ use_data.y          : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y         : num  6.38 6.88 7.12 6.75 7.75 7 6.5 6.12 7.5 6.25 ...
##  $ brmps_ph_mean_1x_wk3: num  6.38 6.88 7.12 6.75 7.75 7 6.5 6.12 7.5 6.25 ...
var.test(x = brmps_1x$brmps_ph_mean_1x_wk1, 
         y = brmps_1x$brmps_ph_mean_1x_wk3, 
         alternative = "greater") #p-value = .6063
## 
##  F test to compare two variances
## 
## data:  brmps_1x$brmps_ph_mean_1x_wk1 and brmps_1x$brmps_ph_mean_1x_wk3
## F = 0.93301, num df = 61, denom df = 61, p-value = 0.6063
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.6102759       Inf
## sample estimates:
## ratio of variances 
##            0.93301
#Statistical Tests 
t.test(x = brmps_1x$brmps_ph_mean_1x_wk1, y = brmps_1x$brmps_ph_mean_1x_wk3, 
       alternative = "greater", paired = TRUE, var.equal = TRUE) 
## 
##  Paired t-test
## 
## data:  brmps_1x$brmps_ph_mean_1x_wk1 and brmps_1x$brmps_ph_mean_1x_wk3
## t = 1.2214, df = 61, p-value = 0.1133
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.03371922         Inf
## sample estimates:
## mean of the differences 
##              0.09177419
#p-value = 0.1133


#BRMPS 1x Daily Plot
brmps1_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         study_week == "week1" | study_week == "week3",
         frequency == "1xdaily") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH after BRMPS Supplement (1x daily)") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
          plot = brmps1_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


brmps1_plot

Although close to our alpha value of .10, we still could not reject our null hypothesis because the p-value was higher than the alpha value. Thus, BRMPS consumed 1x daily did not appear to significantly decrease the pH in participants’ guts on average from week 1 to week 3. We decided that a parametric test would be sufficient since the qq plots showed normal data and the histograms appeared to be approximately normal, even thought the sample size was not greater than 30. Additionally, we found the variences to be equal between the subsetted data sets.

#1x Daily HiMaize+BRMPS


#Further Curation
himaize_brmps_wk1_1x <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "HiMaize+BRMPS", 
         frequency == "1xdaily") 


himaize_brmps_wk3_1x <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "HiMaize+BRMPS", 
         frequency == "1xdaily") 


#Normaily + Varience Assumptions
shapiro.test(himaize_brmps_wk1_1x$ph_mean) #p-value = .2955
## 
##  Shapiro-Wilk normality test
## 
## data:  himaize_brmps_wk1_1x$ph_mean
## W = 0.95021, p-value = 0.2955
shapiro.test(himaize_brmps_wk3_1x$ph_mean) #p-value = .05622
## 
##  Shapiro-Wilk normality test
## 
## data:  himaize_brmps_wk3_1x$ph_mean
## W = 0.91655, p-value = 0.05622
#Normality Plots
ggplot(himaize_brmps_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(himaize_brmps_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(himaize_brmps_wk1_1x$ph_mean); qqline(himaize_brmps_wk1_1x$ph_mean)

qqnorm(himaize_brmps_wk3_1x$ph_mean); qqline(himaize_brmps_wk3_1x$ph_mean)

#Final Joined Data
himaize_brmps_1x <- inner_join(x = himaize_brmps_wk1_1x, y = himaize_brmps_wk3_1x,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(himaize_brmps_ph_mean_1x_wk1 = ph_mean.x,
         himaize_brmps_ph_mean_1x_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(himaize_brmps_1x) # n=23
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 23 obs. of  13 variables:
##  $ participant_id              : chr  "U643" "U644" "U645" "U646" ...
##  $ status.x                    : chr  "before" "before" "before" "before" ...
##  $ semester                    : chr  "Fall2018" "Fall2018" "Fall2018" "Fall2018" ...
##  $ supplement_consumed         : chr  "HiMaize+BRMPS" "HiMaize+BRMPS" "HiMaize+BRMPS" "HiMaize+BRMPS" ...
##  $ use_data.x                  : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant          : chr  "yes" "yes" "yes" "yes" ...
##  $ frequency                   : chr  "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
##  $ ph_median.x                 : num  7.75 7.5 7.1 6.5 6.62 7.65 6.75 7.38 6.88 7.75 ...
##  $ himaize_brmps_ph_mean_1x_wk1: num  7.75 7.5 7.1 6.5 6.62 7.65 6.75 7.38 6.88 7.75 ...
##  $ status.y                    : chr  "during" "during" "during" "during" ...
##  $ use_data.y                  : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y                 : num  6.38 7.25 7.5 7.75 6.25 7.5 6.5 7.38 6 6.75 ...
##  $ himaize_brmps_ph_mean_1x_wk3: num  6.38 7.25 7.5 7.75 6.25 7.5 6.5 7.38 6 6.75 ...
var.test(x = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, 
         y = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3, 
         alternative = "greater") #p-value = .489
## 
##  F test to compare two variances
## 
## data:  himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1 and himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3
## F = 1.012, num df = 22, denom df = 22, p-value = 0.489
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.4941971       Inf
## sample estimates:
## ratio of variances 
##           1.012002
#Statistical Tests 
wilcox.test(x = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, 
            y = himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3, 
            alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, : cannot compute exact p-
## value with ties
## Warning in wilcox.test.default(x =
## himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1, : cannot compute exact p-
## value with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk1 and himaize_brmps_1x$himaize_brmps_ph_mean_1x_wk3
## V = 162.5, p-value = 0.05289
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.05289


#HiMaize+BRMPS 1x Daily Plot
himaize_brmps_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "HiMaize+BRMPS",
         study_week == "week1" | study_week == "week3") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH after HiMaize+BRMPS Supplement") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
          plot = himaize_brmps_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


himaize_brmps_plot

Since we are looking at complex and diverse biological systems, we thought it was reasonable to use an alpha value of .10. Thus, in this case we found a significant p-value that was less than alpha = .10 providing us with sufficient evidence to reject the null hypothesis and accept the alternative concluding that consuming HiMaize+BRMPS supplement 1x daily did decrease pH on average in individuals from week 1 to week 3. We used a non-parametric test (wilcox) with equal variences because week 3 had non-normal data according to the shapiro test and the graph for week 1 appeared to be bimodal in addition to a low sample size.

#1x Daily LOODAT


#Further Curation
loodat_wk1_1x <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "LOODAT", 
         frequency == "1xdaily") 


loodat_wk3_1x <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "LOODAT", 
         frequency == "1xdaily") 


#Normaily + Varience Assumptions
shapiro.test(loodat_wk1_1x$ph_mean) #p-value = .1888
## 
##  Shapiro-Wilk normality test
## 
## data:  loodat_wk1_1x$ph_mean
## W = 0.91943, p-value = 0.1888
shapiro.test(loodat_wk3_1x$ph_mean) #p-value = .0213
## 
##  Shapiro-Wilk normality test
## 
## data:  loodat_wk3_1x$ph_mean
## W = 0.85622, p-value = 0.0213
#Normality Plots
ggplot(loodat_wk1_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(loodat_wk3_1x, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(loodat_wk1_1x$ph_mean); qqline(loodat_wk1_1x$ph_mean)

qqnorm(loodat_wk3_1x$ph_mean); qqline(loodat_wk3_1x$ph_mean)

#Final Joined Data
loodat_1x <- inner_join(x = loodat_wk1_1x, y = loodat_wk3_1x,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(loodat_ph_mean_1x_wk1 = ph_mean.x,
         loodat_ph_mean_1x_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(loodat_1x) # n=15
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 15 obs. of  13 variables:
##  $ participant_id       : chr  "U701" "U706" "U707" "U708" ...
##  $ status.x             : chr  "before" "before" "before" "before" ...
##  $ semester             : chr  "Winter2019" "Winter2019" "Winter2019" "Winter2019" ...
##  $ supplement_consumed  : chr  "LOODAT" "LOODAT" "LOODAT" "LOODAT" ...
##  $ use_data.x           : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant   : chr  "yes" "yes" "yes" "yes" ...
##  $ frequency            : chr  "1xdaily" "1xdaily" "1xdaily" "1xdaily" ...
##  $ ph_median.x          : num  7.5 7.5 7.5 7 7.25 5.25 6.75 7.25 6 6.25 ...
##  $ loodat_ph_mean_1x_wk1: num  7.33 7.42 7.08 7 7.33 5.33 6.67 7.25 6 6.5 ...
##  $ status.y             : chr  "during" "during" "during" "during" ...
##  $ use_data.y           : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y          : num  7.25 7.5 7.62 7 7.5 6.25 7.25 7.25 6.25 6.25 ...
##  $ loodat_ph_mean_1x_wk3: num  7.5 7.33 7.62 7.17 7.17 6.25 7.17 7.25 6.17 6.25 ...
var.test(x = loodat_1x$loodat_ph_mean_1x_wk1, 
         y = loodat_1x$loodat_ph_mean_1x_wk3, 
         alternative = "greater") #p-value = .3212
## 
##  F test to compare two variances
## 
## data:  loodat_1x$loodat_ph_mean_1x_wk1 and loodat_1x$loodat_ph_mean_1x_wk3
## F = 1.2878, num df = 14, denom df = 14, p-value = 0.3212
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.5184902       Inf
## sample estimates:
## ratio of variances 
##           1.287787
#Statistical Tests 
wilcox.test(x = loodat_1x$loodat_ph_mean_1x_wk1, 
            y = loodat_1x$loodat_ph_mean_1x_wk3, 
            alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x = loodat_1x$loodat_ph_mean_1x_wk1, y =
## loodat_1x$loodat_ph_mean_1x_wk3, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = loodat_1x$loodat_ph_mean_1x_wk1, y =
## loodat_1x$loodat_ph_mean_1x_wk3, : cannot compute exact p-value with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  loodat_1x$loodat_ph_mean_1x_wk1 and loodat_1x$loodat_ph_mean_1x_wk3
## V = 27.5, p-value = 0.9024
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.9024


#LOODAT 1x Daily Plot
loodat_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "LOODAT",
         study_week == "week1" | study_week == "week3",
         frequency == "1xdaily") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH after LOODAT Supplement") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
          plot = loodat_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


loodat_plot

The p-value here was far from significant, which would have required it to be lower than alpha = .10. Since it was above this value, we cannot reject the null hypothesis and therefore must conclude that consuming LOODAT 1x daily did not decrease the pH on average in participants’ guts from week 1 to week 3. We used a non-parametric test (wilcox) with equal variences because both subsetted data sets had bimodal data and there was a low sample size.

#No Supplement Consumed 


#Further Curation
no_supplement_wk1 <- curated_ph_wkly %>%
  filter(study_week == "week1", 
         supplement_consumed == "none", 
         frequency == "-") 


no_supplement_wk3 <- curated_ph_wkly %>%
  filter(study_week == "week3", 
         supplement_consumed == "none", 
         frequency == "-") 


#Normaily + Varience Assumptions
shapiro.test(no_supplement_wk1$ph_mean) #p-value = .1029
## 
##  Shapiro-Wilk normality test
## 
## data:  no_supplement_wk1$ph_mean
## W = 0.93318, p-value = 0.1029
shapiro.test(no_supplement_wk3$ph_mean) #p-value = .03968
## 
##  Shapiro-Wilk normality test
## 
## data:  no_supplement_wk3$ph_mean
## W = 0.93654, p-value = 0.03968
#Normality Plots
ggplot(no_supplement_wk1, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(no_supplement_wk3, aes(x = ph_mean)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(no_supplement_wk1$ph_mean); qqline(no_supplement_wk1$ph_mean)

qqnorm(no_supplement_wk3$ph_mean); qqline(no_supplement_wk3$ph_mean)

#Final Joined Data
no_supplement <- inner_join(x = no_supplement_wk1, y = no_supplement_wk3,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed", "quantity_compliant")) %>%
  rename(no_supplement_ph_mean_wk1 = ph_mean.x,
         no_supplement_ph_mean_wk3 = ph_mean.y) %>%
  select(-starts_with("study_week"))
no_supplement
## # A tibble: 24 x 13
##    participant_id status.x semester supplement_cons… use_data.x
##    <chr>          <chr>    <chr>    <chr>            <chr>     
##  1 U513           before   Winter2… none             yes       
##  2 U544           before   Winter2… none             yes       
##  3 U562           before   Winter2… none             yes       
##  4 U566           before   Winter2… none             yes       
##  5 U618           before   Fall2018 none             yes       
##  6 U637           before   Fall2018 none             yes       
##  7 U650           before   Fall2018 none             yes       
##  8 U663           before   Fall2018 none             yes       
##  9 U665           before   Fall2018 none             yes       
## 10 U668           before   Fall2018 none             yes       
## # … with 14 more rows, and 8 more variables: quantity_compliant <chr>,
## #   frequency <chr>, ph_median.x <dbl>, no_supplement_ph_mean_wk1 <dbl>,
## #   status.y <chr>, use_data.y <chr>, ph_median.y <dbl>,
## #   no_supplement_ph_mean_wk3 <dbl>
#Sample Size + Varience Assumptions
str(no_supplement) # n=24
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 24 obs. of  13 variables:
##  $ participant_id           : chr  "U513" "U544" "U562" "U566" ...
##  $ status.x                 : chr  "before" "before" "before" "before" ...
##  $ semester                 : chr  "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
##  $ supplement_consumed      : chr  "none" "none" "none" "none" ...
##  $ use_data.x               : chr  "yes" "yes" "yes" "yes" ...
##  $ quantity_compliant       : chr  "none" "none" "none" "none" ...
##  $ frequency                : chr  "-" "-" "-" "-" ...
##  $ ph_median.x              : num  6.5 6.8 6.2 6.2 7.5 6.12 6.25 6.38 7.25 8 ...
##  $ no_supplement_ph_mean_wk1: num  6.5 6.8 6.2 6.2 7.5 6.12 6.25 6.38 7.25 8 ...
##  $ status.y                 : chr  "during" "during" "during" "during" ...
##  $ use_data.y               : chr  "yes" "yes" "yes" "yes" ...
##  $ ph_median.y              : num  6.55 6.4 6.2 7 7.75 6 7.75 7.5 6.5 8 ...
##  $ no_supplement_ph_mean_wk3: num  6.55 6.4 6.2 7 7.75 6 7.75 7.5 6.5 8 ...
var.test(x = no_supplement$no_supplement_ph_mean_wk1, 
         y = no_supplement$no_supplement_ph_mean_wk3, 
         alternative = "greater") #p-value = 0.2721
## 
##  F test to compare two variances
## 
## data:  no_supplement$no_supplement_ph_mean_wk1 and no_supplement$no_supplement_ph_mean_wk3
## F = 1.2918, num df = 23, denom df = 23, p-value = 0.2721
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.6412776       Inf
## sample estimates:
## ratio of variances 
##           1.291806
#Statistical Tests 
wilcox.test(x = no_supplement$no_supplement_ph_mean_wk1, 
            y = no_supplement$no_supplement_ph_mean_wk3, 
            alternative = "greater", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## no_supplement$no_supplement_ph_mean_wk1, : cannot compute exact p-value
## with ties
## Warning in wilcox.test.default(x =
## no_supplement$no_supplement_ph_mean_wk1, : cannot compute exact p-value
## with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  no_supplement$no_supplement_ph_mean_wk1 and no_supplement$no_supplement_ph_mean_wk3
## V = 94, p-value = 0.7781
## alternative hypothesis: true location shift is greater than 0
#p-value = 0.7781


#No Supplement Plot
no_supplement_plot <- curated_ph_wkly %>%
  filter(supplement_consumed == "none",
         study_week == "week1" | study_week == "week3") %>% 
  ggplot(aes(x = study_week, 
             y = ph_mean)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = NULL,
       y = "Average weekly pH",
       title = "Change in pH without Supplement") +
    theme(legend.position = "none")


#Save Plot
save_plot(filename = "figures/no_supplement_plot.pdf",
          plot = no_supplement_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)


no_supplement_plot

In testing our control (no supplement) group, we hypothesized that there would be no significant result, and that is exactly what we got as the p-value was greater than alpha = .10. Thus we accept the null hypothesis stating that there was not a decrease on average for participants who did not consume a supplement from week 1 to week 3. We surprisingly found the data sets to be bimodal when we graphed them and did we not have a large sample size leading us to use a non-parametric (wilcox) statistical test. In addition, we found the variences to be equal between the two subsetted data sets.

#Finding the Mean Difference in pH between Week 3 and Week 1 for Each Supplement (Week 3 - Week 1)

The fact that some supplements caused a significant decrease in pH and others did not suggests that not all supplements produce the same results and that there might be an overarching supplement that caused the lowest drop in pH. Our goal here is to determine which supplement causes the largest mean difference in pH. We will only test the statistically significant data sets found above:

#BRMPS 2x Daily Mean pH Difference
mean_diff_brmps_2x <- brmps_2x %>%
  select(participant_id, brmps_ph_mean_2x_wk1, brmps_ph_mean_2x_wk3) %>%
  mutate(ph_difference = brmps_ph_mean_2x_wk3 - brmps_ph_mean_2x_wk1) %>%
  summarize(avg_ph_difference = mean(ph_difference))

View(mean_diff_brmps_2x)

#pH Difference = -0.3414815

This calculation tells us that participants who consumed BRMPS 2x daily had an average decrease of .34 in pH from week 1 to week 3.

#Transition Maize 2x Daily Mean pH Difference
mean_diff_transition_2x <- transition_2x %>%
  select(participant_id, transition_ph_mean_2x_wk1, transition_ph_mean_2x_wk3) %>%
  mutate(ph_difference = transition_ph_mean_2x_wk3 - transition_ph_mean_2x_wk1) %>%
  summarize(avg_ph_difference = mean(ph_difference))

View(mean_diff_transition_2x)

#pH Difference = -0.2895238

This calculation tells us that participants who consumed Transition Maize 2x daily had an average decrease of .29 in pH from week 1 to week 3.

#HiMaize+BRMPS 1x Daily Mean pH Difference
mean_diff_himaize_brmps_1x <- himaize_brmps_1x %>%
  select(participant_id, himaize_brmps_ph_mean_1x_wk1, himaize_brmps_ph_mean_1x_wk3) %>%
  mutate(ph_difference = himaize_brmps_ph_mean_1x_wk3 - himaize_brmps_ph_mean_1x_wk1) %>%
  summarize(avg_ph_difference = mean(ph_difference))

View(mean_diff_himaize_brmps_1x)

#pH Difference = -0.2556522

This calculation tells us that participants who consumed HiMaize+BRMPS 1x daily had an average decrease of .26 in pH from week 1 to week 3.

As we can see, 2x Daily BRMPS causes the largest mean decrease in pH from weak 1 to weak 3, and thus we will focus on this supplement for the remainder of our statistical study.

#Species Diversity in BRMPS 2x Daily Individuals

#Import Data
seq_var_table <- read_delim("raw_data/species_avg_shared.txt",
                            delim = "\t", escape_double = FALSE, 
                            trim_ws = TRUE, na=c("NA"),
                            col_types = list()) %>%
  rename_all(tolower) %>%
  select(participant_id, study_week, semester, starts_with("Lactoc"), starts_with("Lactob"))
seq_var_table
## # A tibble: 1,202 x 44
##    participant_id study_week semester `lactococcus la… lactococcus
##    <chr>          <chr>      <chr>               <dbl>       <dbl>
##  1 U042           week1      Fall2015              0             0
##  2 U042           week3      Fall2015              0             0
##  3 U044           week3      Fall2015              1.5           0
##  4 U046           week3      Fall2015              0             0
##  5 U048           week3      Fall2015              0             0
##  6 U050           week3      Fall2015              0             0
##  7 U051           week1      Fall2015              0             0
##  8 U052           week1      Fall2015              0             0
##  9 U052           week3      Fall2015             13             0
## 10 U053           week1      Fall2015              4             0
## # … with 1,192 more rows, and 39 more variables: `lactococcus
## #   raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## #   garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## #   murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## #   johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## #   ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## #   timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## #   crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## #   `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## #   `lactobacillus crispatus` <dbl>, `lactobacillus
## #   apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## #   `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## #   `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## #   `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## #   `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## #   `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## #   `lactobacillus rhamnosus` <dbl>, `lactobacillus
## #   kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## #   `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## #   `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## #   `lactobacillus algidus` <dbl>, `lactobacillus
## #   acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>
#Lactobacillus


#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
##    participant_id study_week status semester supplement_cons… use_data
##    <chr>          <chr>      <chr>  <chr>    <chr>            <chr>   
##  1 U445           week3      during Fall2017 BRMPS            yes     
##  2 U446           week3      during Fall2017 BRMPS            yes     
##  3 U448           week3      during Fall2017 BRMPS            yes     
##  4 U449           week3      during Fall2017 BRMPS            yes     
##  5 U450           week3      during Fall2017 BRMPS            yes     
##  6 U452           week3      during Fall2017 BRMPS            yes     
##  7 U453           week3      during Fall2017 BRMPS            yes     
##  8 U455           week3      during Fall2017 BRMPS            yes     
##  9 U458           week3      during Fall2017 BRMPS            yes     
## 10 U460           week3      during Fall2017 BRMPS            yes     
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## #   frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacillus <- inner_join(seq_var_table, brmps_2x_phylo) %>% #combine two phyloseq objects created above
  select(-starts_with("lactoco")) #get rid of lactococcus
## Joining, by = c("participant_id", "study_week", "semester")
lactobacillus
## # A tibble: 54 x 46
##    participant_id study_week semester `lactobacillus … `lactobacillus …
##    <chr>          <chr>      <chr>               <dbl>            <dbl>
##  1 U500           week1      Winter2…                0                0
##  2 U500           week3      Winter2…                0                0
##  3 U501           week1      Winter2…                0                0
##  4 U501           week3      Winter2…                0                0
##  5 U502           week1      Winter2…                0                0
##  6 U502           week3      Winter2…                0                0
##  7 U507           week1      Winter2…                0                0
##  8 U507           week3      Winter2…                0                0
##  9 U509           week1      Winter2…                0                0
## 10 U509           week3      Winter2…                0                0
## # … with 44 more rows, and 41 more variables: `lactobacillus
## #   sanfranciscensis` <dbl>, `lactobacillus johnsonii` <dbl>,
## #   lactobacillaceae <dbl>, `lactobacillus ruminis` <dbl>, `lactobacillus
## #   salivarius` <dbl>, `lactobacillus timonensis` <dbl>,
## #   lactobacillus <dbl>, `lactobacillus crispatus/gallinarum` <dbl>,
## #   `lactobacillus gallinarum` <dbl>, `lactobacillus iners` <dbl>,
## #   `lactobacillus vaginalis` <dbl>, `lactobacillus crispatus` <dbl>,
## #   `lactobacillus apodemi/murinus` <dbl>, `lactobacillus
## #   fermentum` <dbl>, `lactobacillus gasseri/johnsonii` <dbl>,
## #   lactobacillales <dbl>, `lactobacillus casei` <dbl>, `lactobacillus
## #   brevis` <dbl>, `lactobacillus buchneri` <dbl>, `lactobacillus
## #   delbrueckii` <dbl>, `lactobacillus apis` <dbl>, `lactobacillus
## #   hokkaidonensis` <dbl>, `lactobacillus agilis` <dbl>, `lactobacillus
## #   casei group` <dbl>, `lactobacillus rhamnosus` <dbl>, `lactobacillus
## #   kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## #   `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## #   `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## #   `lactobacillus algidus` <dbl>, `lactobacillus
## #   acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## #   status <chr>, supplement_consumed <chr>, use_data <chr>,
## #   quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## #   ph_mean <dbl>
#Calculate Row Sums
lactobacillus_total <-  rowSums(lactobacillus[,4:39]) 
lactobacillus_total
##  [1]  22.00 280.50   0.00   0.00   0.00   0.00   0.00   0.00   5.33  13.67
## [11]   0.00   0.00   0.00  48.50   5.00  81.33   1.00   0.00   0.00 115.50
## [21]   0.00   0.00  59.00  56.00   0.00   2.33   0.00   0.00 188.00  31.33
## [31]  36.50 474.66  27.00   6.67   0.00   0.00   0.00  24.00  23.34  47.66
## [41]   0.67   1.67   3.00   0.00   0.00   0.00   0.00 340.00  29.66  77.99
## [51] 116.00 565.67   0.00   0.67
#Mutate Row Sums into Previous Table
new_lactobacillus <- lactobacillus %>% 
  mutate(lactobacillus = lactobacillus_total) %>%
  select(participant_id, study_week, semester, lactobacillus, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacillus
## # A tibble: 54 x 8
##    participant_id study_week semester lactobacillus ph_median ph_mean
##    <chr>          <chr>      <chr>            <dbl>     <dbl>   <dbl>
##  1 U500           week1      Winter2…         22         6.6     6.6 
##  2 U500           week3      Winter2…        280.        6.45    6.45
##  3 U501           week1      Winter2…          0         6.6     6.6 
##  4 U501           week3      Winter2…          0         6.2     6.2 
##  5 U502           week1      Winter2…          0         6.4     6.4 
##  6 U502           week3      Winter2…          0         6.4     6.4 
##  7 U507           week1      Winter2…          0         6.7     6.7 
##  8 U507           week3      Winter2…          0         6.4     6.4 
##  9 U509           week1      Winter2…          5.33      6.8     6.8 
## 10 U509           week3      Winter2…         13.7       6       6   
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## #   supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactobacillus <- new_lactobacillus %>%
  filter(study_week == "week3")


wk_3_lactobacillus_plot <- wk_3_lactobacillus %>%
  ggplot(aes(x = ph_mean,
             y = lactobacillus)) + 
  geom_point() + #puts data points to match x and y coordinates
  geom_smooth(method = "lm", #used to create a linear best fit line
              se = FALSE) + #hides confidence interval around line 
  xlab("Week 3 Mean pH") + 
  ylab("Relative Abundance of Lactobacillus Bacteria") 
wk_3_lactobacillus_plot

#Correlation Test
lactobacillus_correlation <- wk_3_lactobacillus %>%
  lm(ph_mean ~ lactobacillus, data = .) #test relationship
summary(lactobacillus_correlation) #view results 
## 
## Call:
## lm(formula = ph_mean ~ lactobacillus, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9645 -0.3357  0.1972  0.2716  1.0468 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.1284049  0.1065108  57.538   <2e-16 ***
## lactobacillus 0.0004436  0.0006286   0.706    0.487    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4872 on 25 degrees of freedom
## Multiple R-squared:  0.01953,    Adjusted R-squared:  -0.01969 
## F-statistic: 0.4979 on 1 and 25 DF,  p-value: 0.4869
lactobacillus_correlation
## 
## Call:
## lm(formula = ph_mean ~ lactobacillus, data = .)
## 
## Coefficients:
##   (Intercept)  lactobacillus  
##     6.1284049      0.0004436
#p-value = .4869
#R-squared = .01953

Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .4869) indicates that the null hypothesis is true meaning there is no correlation between pH and Lactobacillus genre bacteria. Additionally, the small r-squared value indicates that only about 1.9% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis which is opposite to what we hypothesized, as here the number of Lactobacillus bacteria increases as pH increases.

#Lactococcus 


#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
##    participant_id study_week status semester supplement_cons… use_data
##    <chr>          <chr>      <chr>  <chr>    <chr>            <chr>   
##  1 U445           week3      during Fall2017 BRMPS            yes     
##  2 U446           week3      during Fall2017 BRMPS            yes     
##  3 U448           week3      during Fall2017 BRMPS            yes     
##  4 U449           week3      during Fall2017 BRMPS            yes     
##  5 U450           week3      during Fall2017 BRMPS            yes     
##  6 U452           week3      during Fall2017 BRMPS            yes     
##  7 U453           week3      during Fall2017 BRMPS            yes     
##  8 U455           week3      during Fall2017 BRMPS            yes     
##  9 U458           week3      during Fall2017 BRMPS            yes     
## 10 U460           week3      during Fall2017 BRMPS            yes     
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## #   frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactococcus <- inner_join(seq_var_table, brmps_2x_phylo) %>% #combine two phyloseq objects created above
  select(-starts_with("lactoba")) #get rid of lactococcus
## Joining, by = c("participant_id", "study_week", "semester")
lactococcus
## # A tibble: 54 x 15
##    participant_id study_week semester `lactococcus la… lactococcus
##    <chr>          <chr>      <chr>               <dbl>       <dbl>
##  1 U500           week1      Winter2…             0              0
##  2 U500           week3      Winter2…             0              0
##  3 U501           week1      Winter2…             0              0
##  4 U501           week3      Winter2…             5.67           0
##  5 U502           week1      Winter2…             0              0
##  6 U502           week3      Winter2…             0              0
##  7 U507           week1      Winter2…            17              0
##  8 U507           week3      Winter2…             0.5            0
##  9 U509           week1      Winter2…             0              0
## 10 U509           week3      Winter2…             8.67           0
## # … with 44 more rows, and 10 more variables: `lactococcus
## #   raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## #   garvieae` <dbl>, status <chr>, supplement_consumed <chr>,
## #   use_data <chr>, quantity_compliant <chr>, frequency <chr>,
## #   ph_median <dbl>, ph_mean <dbl>
#Calculate Row Sums
lactococcus_total <-  rowSums(lactococcus[,4:8]) 
lactococcus_total
##  [1]  0.00  0.00  0.00  5.67  0.00  0.00 17.00  0.50  0.00  8.67  4.00
## [12]  3.67  0.00  0.00  1.67  0.00  3.00 20.67  0.00  0.00  0.67  1.67
## [23]  0.00  0.00  0.67  1.67  1.33  0.00  2.00  0.00  7.00  0.00  3.67
## [34]  0.00  0.00  0.00  0.00 15.00  3.67 10.00  4.67  3.67  0.33  0.00
## [45]  0.33  0.00  4.00  0.00  0.33  1.67  6.50  0.67  5.67  4.00
#Mutate Row Sums into Previous Table
new_lactococcus <- lactococcus %>% 
  mutate(lactococcus = lactococcus_total) %>%
  select(participant_id, study_week, semester, lactococcus, ph_median, ph_mean, frequency, supplement_consumed)
new_lactococcus
## # A tibble: 54 x 8
##    participant_id study_week semester lactococcus ph_median ph_mean
##    <chr>          <chr>      <chr>          <dbl>     <dbl>   <dbl>
##  1 U500           week1      Winter2…        0         6.6     6.6 
##  2 U500           week3      Winter2…        0         6.45    6.45
##  3 U501           week1      Winter2…        0         6.6     6.6 
##  4 U501           week3      Winter2…        5.67      6.2     6.2 
##  5 U502           week1      Winter2…        0         6.4     6.4 
##  6 U502           week3      Winter2…        0         6.4     6.4 
##  7 U507           week1      Winter2…       17         6.7     6.7 
##  8 U507           week3      Winter2…        0.5       6.4     6.4 
##  9 U509           week1      Winter2…        0         6.8     6.8 
## 10 U509           week3      Winter2…        8.67      6       6   
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## #   supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactococcus <- new_lactococcus %>%
  filter(study_week == "week3")


wk_3_lactococcus_plot <- wk_3_lactococcus %>%
  ggplot(aes(x = ph_mean,
             y = lactococcus)) + 
  geom_point() + #puts data points to match x and y coordinates
  geom_smooth(method = "lm", #used to create a linear best fit line
              se = FALSE) + #hides confidence interval around line 
  xlab("Week 3 Mean pH") + 
  ylab("Relative Abundance of Lactococcus Bacteria") 
wk_3_lactococcus_plot

#Correlation Test
lactococcus_correlation <- wk_3_lactococcus %>%
  lm(ph_mean ~ lactococcus, data = .) #test relationship
summary(lactococcus_correlation) #view results 
## 
## Call:
## lm(formula = ph_mean ~ lactococcus, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9090 -0.3441  0.1001  0.3160  1.0910 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.10904    0.10678  57.212   <2e-16 ***
## lactococcus  0.01909    0.01838   1.039    0.309    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4817 on 25 degrees of freedom
## Multiple R-squared:  0.04137,    Adjusted R-squared:  0.00302 
## F-statistic: 1.079 on 1 and 25 DF,  p-value: 0.3089
lactococcus_correlation
## 
## Call:
## lm(formula = ph_mean ~ lactococcus, data = .)
## 
## Coefficients:
## (Intercept)  lactococcus  
##     6.10904      0.01909
#p-value = .3089
#R-squared = .04137

Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .3089) indicates that the null hypothesis is true meaning there is no correlation between pH and Lactococcus genre bacteria. Additionally, the small r-squared value indicates that only about 4.1% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis, which is opposite to what we hypothesized, as here the number of Lactococcus bacteria increases as pH increases.

We figured since everybody has a different microbiome, that some individuals may contain Lactococcus or just Lactobacillus genre bacteria and since they perform similar functions we thought it would be reasonable to combine them into one data set and see if there was a correlation between “Lacto” Bacteria and pH to compensate for different microbiome makeups.

#"Lacto" Genre Bacteria 


#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
##    participant_id study_week status semester supplement_cons… use_data
##    <chr>          <chr>      <chr>  <chr>    <chr>            <chr>   
##  1 U445           week3      during Fall2017 BRMPS            yes     
##  2 U446           week3      during Fall2017 BRMPS            yes     
##  3 U448           week3      during Fall2017 BRMPS            yes     
##  4 U449           week3      during Fall2017 BRMPS            yes     
##  5 U450           week3      during Fall2017 BRMPS            yes     
##  6 U452           week3      during Fall2017 BRMPS            yes     
##  7 U453           week3      during Fall2017 BRMPS            yes     
##  8 U455           week3      during Fall2017 BRMPS            yes     
##  9 U458           week3      during Fall2017 BRMPS            yes     
## 10 U460           week3      during Fall2017 BRMPS            yes     
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## #   frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacteria <- inner_join(seq_var_table, brmps_2x_phylo) 
## Joining, by = c("participant_id", "study_week", "semester")
lactobacteria
## # A tibble: 54 x 51
##    participant_id study_week semester `lactococcus la… lactococcus
##    <chr>          <chr>      <chr>               <dbl>       <dbl>
##  1 U500           week1      Winter2…             0              0
##  2 U500           week3      Winter2…             0              0
##  3 U501           week1      Winter2…             0              0
##  4 U501           week3      Winter2…             5.67           0
##  5 U502           week1      Winter2…             0              0
##  6 U502           week3      Winter2…             0              0
##  7 U507           week1      Winter2…            17              0
##  8 U507           week3      Winter2…             0.5            0
##  9 U509           week1      Winter2…             0              0
## 10 U509           week3      Winter2…             8.67           0
## # … with 44 more rows, and 46 more variables: `lactococcus
## #   raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## #   garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## #   murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## #   johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## #   ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## #   timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## #   crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## #   `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## #   `lactobacillus crispatus` <dbl>, `lactobacillus
## #   apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## #   `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## #   `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## #   `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## #   `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## #   `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## #   `lactobacillus rhamnosus` <dbl>, `lactobacillus
## #   kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## #   `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## #   `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## #   `lactobacillus algidus` <dbl>, `lactobacillus
## #   acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## #   status <chr>, supplement_consumed <chr>, use_data <chr>,
## #   quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## #   ph_mean <dbl>
#Calculate Row Sums
lactobacteria_total <-  rowSums(lactobacteria[,4:44]) 
lactobacteria_total
##  [1]  22.00 280.50   0.00   5.67   0.00   0.00  17.00   0.50   5.33  22.34
## [11]   4.00   3.67   0.00  48.50   6.67  81.33   4.00  20.67   0.00 115.50
## [21]   0.67   1.67  59.00  56.00   0.67   4.00   1.33   0.00 190.00  31.33
## [31]  43.50 474.66  30.67   6.67   0.00   0.00   0.00  39.00  27.01  57.66
## [41]   5.34   5.34   3.33   0.00   0.33   0.00   4.00 340.00  29.99  79.66
## [51] 122.50 566.34   5.67   4.67
#Mutate Row Sums into Previous Table
new_lactobacteria <- lactobacteria %>% 
  mutate(lactobacteria = lactobacteria_total) %>%
  select(participant_id, study_week, semester, lactobacteria, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacteria
## # A tibble: 54 x 8
##    participant_id study_week semester lactobacteria ph_median ph_mean
##    <chr>          <chr>      <chr>            <dbl>     <dbl>   <dbl>
##  1 U500           week1      Winter2…         22         6.6     6.6 
##  2 U500           week3      Winter2…        280.        6.45    6.45
##  3 U501           week1      Winter2…          0         6.6     6.6 
##  4 U501           week3      Winter2…          5.67      6.2     6.2 
##  5 U502           week1      Winter2…          0         6.4     6.4 
##  6 U502           week3      Winter2…          0         6.4     6.4 
##  7 U507           week1      Winter2…         17         6.7     6.7 
##  8 U507           week3      Winter2…          0.5       6.4     6.4 
##  9 U509           week1      Winter2…          5.33      6.8     6.8 
## 10 U509           week3      Winter2…         22.3       6       6   
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## #   supplement_consumed <chr>
#Correlation Data With Graph
wk_3_lactobacteria <- new_lactobacteria %>%
  filter(study_week == "week3")


wk_3_lactobacteria_plot <- wk_3_lactobacteria %>%
  ggplot(aes(x = ph_mean,
             y = lactobacteria)) + 
  geom_point() + #puts data points to match x and y coordinates
  geom_smooth(method = "lm", #used to create a linear best fit line
              se = FALSE) + #hides confidence interval around line 
  xlab("Week 3 Mean pH") + 
  ylab("Relative Abundance of Lactate Producers") 
wk_3_lactobacteria_plot

#Correlation Test
lactobacteria_correlation <- wk_3_lactobacteria %>%
  lm(ph_mean ~ lactobacteria, data = .) #test relationship
summary(lactobacteria_correlation) #view results 
## 
## Call:
## lm(formula = ph_mean ~ lactobacteria, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9631 -0.3334  0.1928  0.2740  1.0488 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.1247251  0.1074485  57.002   <2e-16 ***
## lactobacteria 0.0004724  0.0006326   0.747    0.462    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4866 on 25 degrees of freedom
## Multiple R-squared:  0.02182,    Adjusted R-squared:  -0.01731 
## F-statistic: 0.5577 on 1 and 25 DF,  p-value: 0.4622
lactobacteria_correlation
## 
## Call:
## lm(formula = ph_mean ~ lactobacteria, data = .)
## 
## Coefficients:
##   (Intercept)  lactobacteria  
##     6.1247251      0.0004724
#p-value = .4622
#R-squared = .02182

Just looking at the scatter plot we can see that there does not appear to be any correlation or pattern between the dots, as they appear to be randomly scattered. Our statistical analysis confirms this as the large p-value (p-value = .4622) indicates that the null hypothesis is true meaning there is no correlation between pH and “Lacto” bacteria. Additionally, the small r-squared value indicates that only about 2.2% of the data can be truly represented by the line of best fit, which is extremely small and inaccurate. If there were to be a correlation, it would be a positive one according to the statistical analysis, which is opposite to what we hypothesized, as here the number of “Lacto” bacteria increases as pH increases.

#Did the Number of Lactate Producers Rise?

Because our correlation tests did not find any significant results, we decided to look a little deeper to see if the supplement BRMPS taken 2x daily did at least increase the number of Lactate producers (Lactococcus and Lactobacillus bacteria). These Lactate producers may not be correlated with more acidic conditions, but they confer a variety of health benefits such as decreased gut permeability (inhibiting toxic bacterial biproducts from being absorbed) as well as disease resistance and increased immunity. Additionally, they produce acidic biproducts which may contribute to lower gut pH.

#Import Individual Data
brmps_2x_phylo <- curated_ph_wkly %>%
  filter(supplement_consumed == "BRMPS",
         frequency == "2xdaily")
brmps_2x_phylo
## # A tibble: 84 x 10
##    participant_id study_week status semester supplement_cons… use_data
##    <chr>          <chr>      <chr>  <chr>    <chr>            <chr>   
##  1 U445           week3      during Fall2017 BRMPS            yes     
##  2 U446           week3      during Fall2017 BRMPS            yes     
##  3 U448           week3      during Fall2017 BRMPS            yes     
##  4 U449           week3      during Fall2017 BRMPS            yes     
##  5 U450           week3      during Fall2017 BRMPS            yes     
##  6 U452           week3      during Fall2017 BRMPS            yes     
##  7 U453           week3      during Fall2017 BRMPS            yes     
##  8 U455           week3      during Fall2017 BRMPS            yes     
##  9 U458           week3      during Fall2017 BRMPS            yes     
## 10 U460           week3      during Fall2017 BRMPS            yes     
## # … with 74 more rows, and 4 more variables: quantity_compliant <chr>,
## #   frequency <chr>, ph_median <dbl>, ph_mean <dbl>
#Combine to Make New Data Sets
lactobacteria <- inner_join(seq_var_table, brmps_2x_phylo) 
## Joining, by = c("participant_id", "study_week", "semester")
lactobacteria
## # A tibble: 54 x 51
##    participant_id study_week semester `lactococcus la… lactococcus
##    <chr>          <chr>      <chr>               <dbl>       <dbl>
##  1 U500           week1      Winter2…             0              0
##  2 U500           week3      Winter2…             0              0
##  3 U501           week1      Winter2…             0              0
##  4 U501           week3      Winter2…             5.67           0
##  5 U502           week1      Winter2…             0              0
##  6 U502           week3      Winter2…             0              0
##  7 U507           week1      Winter2…            17              0
##  8 U507           week3      Winter2…             0.5            0
##  9 U509           week1      Winter2…             0              0
## 10 U509           week3      Winter2…             8.67           0
## # … with 44 more rows, and 46 more variables: `lactococcus
## #   raffinolactis` <dbl>, `lactococcus piscium` <dbl>, `lactococcus
## #   garvieae` <dbl>, `lactobacillus pontis` <dbl>, `lactobacillus
## #   murinus` <dbl>, `lactobacillus sanfranciscensis` <dbl>, `lactobacillus
## #   johnsonii` <dbl>, lactobacillaceae <dbl>, `lactobacillus
## #   ruminis` <dbl>, `lactobacillus salivarius` <dbl>, `lactobacillus
## #   timonensis` <dbl>, lactobacillus <dbl>, `lactobacillus
## #   crispatus/gallinarum` <dbl>, `lactobacillus gallinarum` <dbl>,
## #   `lactobacillus iners` <dbl>, `lactobacillus vaginalis` <dbl>,
## #   `lactobacillus crispatus` <dbl>, `lactobacillus
## #   apodemi/murinus` <dbl>, `lactobacillus fermentum` <dbl>,
## #   `lactobacillus gasseri/johnsonii` <dbl>, lactobacillales <dbl>,
## #   `lactobacillus casei` <dbl>, `lactobacillus brevis` <dbl>,
## #   `lactobacillus buchneri` <dbl>, `lactobacillus delbrueckii` <dbl>,
## #   `lactobacillus apis` <dbl>, `lactobacillus hokkaidonensis` <dbl>,
## #   `lactobacillus agilis` <dbl>, `lactobacillus casei group` <dbl>,
## #   `lactobacillus rhamnosus` <dbl>, `lactobacillus
## #   kefiri/otakiensis` <dbl>, `lactobacillus helveticus` <dbl>,
## #   `lactobacillus versmoldensis` <dbl>, `lactobacillus plantarum` <dbl>,
## #   `lactobacillus sakei` <dbl>, `lactobacillus reuteri` <dbl>,
## #   `lactobacillus algidus` <dbl>, `lactobacillus
## #   acidophilus/crispatus` <dbl>, `lactobacillus mucosae` <dbl>,
## #   status <chr>, supplement_consumed <chr>, use_data <chr>,
## #   quantity_compliant <chr>, frequency <chr>, ph_median <dbl>,
## #   ph_mean <dbl>
#Calculate Row Sums
lactobacteria_total <-  rowSums(lactobacteria[,4:44]) 
lactobacteria_total
##  [1]  22.00 280.50   0.00   5.67   0.00   0.00  17.00   0.50   5.33  22.34
## [11]   4.00   3.67   0.00  48.50   6.67  81.33   4.00  20.67   0.00 115.50
## [21]   0.67   1.67  59.00  56.00   0.67   4.00   1.33   0.00 190.00  31.33
## [31]  43.50 474.66  30.67   6.67   0.00   0.00   0.00  39.00  27.01  57.66
## [41]   5.34   5.34   3.33   0.00   0.33   0.00   4.00 340.00  29.99  79.66
## [51] 122.50 566.34   5.67   4.67
#Mutate Row Sums into Previous Table
new_lactobacteria <- lactobacteria %>% 
  mutate(lactobacteria = lactobacteria_total) %>%
  select(participant_id, study_week, semester, lactobacteria, ph_median, ph_mean, frequency, supplement_consumed)
new_lactobacteria
## # A tibble: 54 x 8
##    participant_id study_week semester lactobacteria ph_median ph_mean
##    <chr>          <chr>      <chr>            <dbl>     <dbl>   <dbl>
##  1 U500           week1      Winter2…         22         6.6     6.6 
##  2 U500           week3      Winter2…        280.        6.45    6.45
##  3 U501           week1      Winter2…          0         6.6     6.6 
##  4 U501           week3      Winter2…          5.67      6.2     6.2 
##  5 U502           week1      Winter2…          0         6.4     6.4 
##  6 U502           week3      Winter2…          0         6.4     6.4 
##  7 U507           week1      Winter2…         17         6.7     6.7 
##  8 U507           week3      Winter2…          0.5       6.4     6.4 
##  9 U509           week1      Winter2…          5.33      6.8     6.8 
## 10 U509           week3      Winter2…         22.3       6       6   
## # … with 44 more rows, and 2 more variables: frequency <chr>,
## #   supplement_consumed <chr>
#Weekly Data 
wk_1_lactobacteria <- new_lactobacteria %>%
  filter(study_week == "week1")


wk_3_lactobacteria <- new_lactobacteria %>%
  filter(study_week == "week3")


#Normality
shapiro.test(wk_1_lactobacteria$lactobacteria) 
## 
##  Shapiro-Wilk normality test
## 
## data:  wk_1_lactobacteria$lactobacteria
## W = 0.5561, p-value = 6.575e-08
shapiro.test(wk_3_lactobacteria$lactobacteria) 
## 
##  Shapiro-Wilk normality test
## 
## data:  wk_3_lactobacteria$lactobacteria
## W = 0.60405, p-value = 2.305e-07
#Normality Plots
ggplot(wk_1_lactobacteria, aes(x = lactobacteria)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(wk_3_lactobacteria, aes(x = lactobacteria)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qqnorm(wk_1_lactobacteria$lactobacteria); qqline(wk_1_lactobacteria$lactobacteria)

qqnorm(wk_3_lactobacteria$lactobacteria); qqline(wk_3_lactobacteria$lactobacteria)

#Final Joined Data
stat_lactobacteria <- inner_join(x = wk_1_lactobacteria, y = wk_3_lactobacteria,
                    by = c("participant_id", "frequency", 
                           "semester", "supplement_consumed")) %>%
  rename(lactobacteria_wk1_abund = lactobacteria.x,
         lactobacteria_wk3_abund = lactobacteria.y) %>%
  select(-starts_with("study_week"))


#Sample Size + Varience Assumptions
str(stat_lactobacteria) # n=26
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 26 obs. of  10 variables:
##  $ participant_id         : chr  "U500" "U501" "U502" "U507" ...
##  $ semester               : chr  "Winter2018" "Winter2018" "Winter2018" "Winter2018" ...
##  $ lactobacteria_wk1_abund: num  22 0 0 17 5.33 4 0 6.67 4 0 ...
##  $ ph_median.x            : num  6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
##  $ ph_mean.x              : num  6.6 6.6 6.4 6.7 6.8 6.8 6.5 6.4 6.9 6 ...
##  $ frequency              : chr  "2xdaily" "2xdaily" "2xdaily" "2xdaily" ...
##  $ supplement_consumed    : chr  "BRMPS" "BRMPS" "BRMPS" "BRMPS" ...
##  $ lactobacteria_wk3_abund: num  280.5 5.67 0 0.5 22.34 ...
##  $ ph_median.y            : num  6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
##  $ ph_mean.y              : num  6.45 6.2 6.4 6.4 6 6.4 6.4 5.2 6.85 5.4 ...
var.test(x = stat_lactobacteria$lactobacteria_wk1_abund, 
         y = stat_lactobacteria$lactobacteria_wk3_abund, 
         alternative = "greater") #p-value= 1
## 
##  F test to compare two variances
## 
## data:  stat_lactobacteria$lactobacteria_wk1_abund and stat_lactobacteria$lactobacteria_wk3_abund
## F = 0.080474, num df = 25, denom df = 25, p-value = 1
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
##  0.04115382        Inf
## sample estimates:
## ratio of variances 
##         0.08047412
#Statistical Tests 
wilcox.test(x = stat_lactobacteria$lactobacteria_wk1_abund, 
            y = stat_lactobacteria$lactobacteria_wk3_abund, 
            alternative = "less", paired = TRUE, var.equal = TRUE)
## Warning in wilcox.test.default(x =
## stat_lactobacteria$lactobacteria_wk1_abund, : cannot compute exact p-value
## with zeroes
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  stat_lactobacteria$lactobacteria_wk1_abund and stat_lactobacteria$lactobacteria_wk3_abund
## V = 58, p-value = 0.007803
## alternative hypothesis: true location shift is less than 0
#p-value= .007803


#Violin Plot for Visualization
brmps_2x_lactobacteria_plot <- new_lactobacteria %>%
  filter(study_week == "week1" | study_week == "week3") %>% 
  ggplot(aes(x = study_week, 
             y = lactobacteria)) + 
  geom_violin(aes(color = study_week)) +
  geom_jitter(aes(color = study_week))+
  labs(x = "Study Week",
       y = "Lacto Bacteria Relative Abundance",
       title = "Weekly Comparison of Abundances of Lacto Bacteria") +
    theme(legend.position = "none")
brmps_2x_lactobacteria_plot

#Save Plot
save_plot(filename = "brmps_2x_lactobacteria_plot.pdf",
          plot = brmps_2x_lactobacteria_plot,
          nrow = 1, ncol = 1,
          base_aspect_ratio = 1.1)